library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
data('gravier', package = 'datamicroarray')
table(gravier$y)
#>
#> good poor
#> 111 57
gravierset <- as.data.frame(cbind(class=1*(gravier$y=="poor"),gravier$x))
gravier <- NULL
studyName <- "GRAVIER"
dataframe <- gravierset
outcome <- "class"
TopVariables <- 10
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 168 | 2905 |
pander::pander(table(dataframe[,outcome]))
| 0 | 1 |
|---|---|
| 111 | 57 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
### Some global cleaning
sdiszero <- apply(dataframe,2,sd) > 1.0e-16
dataframe <- dataframe[,sdiszero]
varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.9999)),outcome)
dataframe <- dataframe[,tokeep]
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
dataframe <- FRESAScale(dataframe,method="OrderLogit")$scaledData
hm <- heatMaps(data=dataframe,
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
cexCol=0.15,
cexRow=0.25
)
par(op)
The heat map of the data
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = 0.15,
cexCol = 0.15,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
DEdataframe <- IDeA(dataframe,verbose=TRUE)
#>
#> Included: 2905 , Uni p: 0.005278412 , Uncorrelated Base: 2315 , Outcome-Driven Size: 0 , Base Size: 2315
#>
#>
1 <R=0.965,w= 1,N= 2>, Top: 1( 1 )[ 1 : 1 : 0.957 ]( 1 , 1 , 0 ),<|>Tot Used: 2 , Added: 1 , Zero Std: 0 , Max Cor: 0.956
#>
2 <R=0.956,w= 2,N= 35>, Top: 12( 3 )[ 1 : 12 : 0.928 ]( 12 , 20 , 1 ),<|>Tot Used: 33 , Added: 20 , Zero Std: 0 , Max Cor: 0.925
#>
3 <R=0.925,w= 2,N= 35>, Top: 16( 1 )[ 1 : 16 : 0.912 ]( 16 , 18 , 12 ),<|>Tot Used: 64 , Added: 18 , Zero Std: 0 , Max Cor: 0.912
#>
4 <R=0.912,w= 2,N= 35>, Top: 13( 1 )[ 1 : 13 : 0.906 ]( 13 , 14 , 25 ),<|>Tot Used: 85 , Added: 14 , Zero Std: 0 , Max Cor: 0.905
#>
5 <R=0.905,w= 2,N= 35>, Top: 7( 1 )[ 1 : 7 : 0.903 ]( 6 , 6 , 34 ),<|>Tot Used: 94 , Added: 6 , Zero Std: 0 , Max Cor: 0.902
#>
6 <R=0.902,w= 2,N= 35>, Top: 5( 1 )[ 1 : 5 : 0.901 ]( 5 , 5 , 38 ),<|>Tot Used: 102 , Added: 5 , Zero Std: 0 , Max Cor: 0.900
#>
7 <R=0.900,w= 2,N= 35>, Top: 2( 1 )[ 1 : 2 : 0.900 ]( 2 , 2 , 41 ),<|>Tot Used: 105 , Added: 2 , Zero Std: 0 , Max Cor: 0.900
#>
8 <R=0.900,w= 3,N= 269>, Top: 87( 5 )[ 1 : 87 : 0.850 ]( 87 , 148 , 43 ),<|>Tot Used: 312 , Added: 148 , Zero Std: 0 , Max Cor: 0.897
#>
9 <R=0.897,w= 3,N= 269>, Top: 15( 2 )[ 1 : 15 : 0.849 ]( 15 , 18 , 114 ),<|>Tot Used: 342 , Added: 18 , Zero Std: 0 , Max Cor: 0.848
#>
10 <R=0.848,w= 4,N= 484>, Top: 166( 1 ).[ 1 : 166 : 0.800 ]( 159 , 229 , 126 ),<|>Tot Used: 666 , Added: 229 , Zero Std: 0 , Max Cor: 0.839
#>
11 <R=0.839,w= 4,N= 484>, Top: 14( 1 )[ 1 : 14 : 0.800 ]( 14 , 22 , 247 ),<|>Tot Used: 695 , Added: 22 , Zero Std: 0 , Max Cor: 0.804
#>
12 <R=0.804,w= 5,N= 2>, Top: 1( 1 )[ 1 : 1 : 0.800 ]( 1 , 1 , 258 ),<|>Tot Used: 697 , Added: 1 , Zero Std: 0 , Max Cor: 0.800
#>
13 <R=0.000,w= 6,N= 0>
#>
[ 13 ], 0.7978739 Decor Dimension: 697 . Cor to Base: 407 , ABase: 178 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
3891
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
3463
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
5
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
4.8
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = 0.15,
cexCol = 0.15,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
cormat <- cor(DEdataframe[,varlistc],method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Correlation after IDeA",
cexRow = 0.15,
cexCol = 0.15,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
par(op)
print(max(abs(cormat)))
[1] 0.7999088
classes <- unique(dataframe[,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[,outcome],col=raincolors[dataframe[,outcome]+1])
datasetframe.umap = umap(scale(DEdataframe[,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[,outcome],col=raincolors[DEdataframe[,outcome]+1])
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : g1A07 200 : g3H03 300 : g1E07 400 : g4F03 500 : g4C05
600 : g1int292 700 : g1int356 800 : g7A03 900 : g4D11 1000 :
g1int577
1100 : g1CNS420 1200 : g7G07 1300 : g1int785 1400 : g1CNS59 1500 :
g1CNS178
1600 : g1int949 1700 : g1int1028 1800 : g1int1089 1900 : g11D05 2000 :
g1int1222
2100 : g1int1298 2200 : g1int1376 2300 : g1int1449 2400 : g10E08 2500 :
g1CNS90
2600 : g7F11 2700 : g1int1693 2800 : g1CNS93 2900 : g1int1800
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : g1A07 200 : g3H03 300 : La_g1E07 400 : g4F03 500 : g4C05
600 : g1int292 700 : g1int356 800 : g7A03 900 : g4D11 1000 :
g1int577
1100 : g1CNS420 1200 : La_g7G07 1300 : g1int785 1400 : La_g1CNS59 1500 :
g1CNS178
1600 : g1int949 1700 : g1int1028 1800 : La_g1int1089 1900 : g11D05 2000
: g1int1222
2100 : g1int1298 2200 : g1int1376 2300 : La_g1int1449 2400 : g10E08 2500
: g1CNS90
2600 : g7F11 2700 : g1int1693 2800 : La_g1CNS93 2900 : g1int1800
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| g1CNS507 | -0.812 | 0.941 | 0.1840 | 0.855 | 0.11198 | 0.796 |
| g1CNS105 | 0.832 | 1.160 | -0.0863 | 0.836 | 0.01027 | 0.749 |
| g1CNS382 | -0.745 | 1.076 | 0.1514 | 0.940 | 0.12716 | 0.745 |
| g1int804 | -0.656 | 0.949 | 0.1149 | 0.889 | 0.02827 | 0.743 |
| g1CNS91 | 0.887 | 1.174 | -0.0348 | 0.695 | 0.00370 | 0.742 |
| g1CNS26 | -0.722 | 1.108 | 0.1852 | 0.855 | 0.93893 | 0.738 |
| g1int340 | -0.957 | 1.291 | 0.1553 | 1.165 | 0.08048 | 0.737 |
| g1CNS70 | 0.898 | 1.293 | -0.0759 | 0.978 | 0.06554 | 0.731 |
| g1CNS158 | 0.846 | 1.168 | 0.0216 | 0.890 | 0.00114 | 0.731 |
| g1CNS28 | 0.838 | 1.180 | -0.0334 | 0.916 | 0.00650 | 0.726 |
finalTable <- univarDe$orderframe[topvar,univariate_columns]
pander::pander(univarDe$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| g1CNS507 | -0.812 | 0.941 | 0.1840 | 0.855 | 0.11198 | 0.796 |
| g1CNS105 | 0.832 | 1.160 | -0.0863 | 0.836 | 0.01027 | 0.749 |
| g1int340 | -0.957 | 1.291 | 0.1553 | 1.165 | 0.08048 | 0.737 |
| g1CNS158 | 0.846 | 1.168 | 0.0216 | 0.890 | 0.00114 | 0.731 |
| g8D02 | -0.776 | 1.372 | 0.2052 | 1.194 | 0.14136 | 0.725 |
| g1int1671 | 0.614 | 1.133 | -0.1787 | 1.055 | 0.20931 | 0.719 |
| g9E01 | 0.876 | 1.334 | -0.0751 | 1.067 | 0.08676 | 0.717 |
| g1int812 | -0.571 | 1.013 | 0.1304 | 0.840 | 0.27965 | 0.716 |
| g1CNS74 | 0.645 | 1.185 | -0.1661 | 1.093 | 0.41448 | 0.716 |
| g8F04 | 0.511 | 0.943 | -0.2328 | 0.953 | 0.80241 | 0.710 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
| mean | total | fraction |
|---|---|---|
| 2 | 484 | 0.167 |
dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
coef <- theFormulas[[dx]]
cname <- names(theFormulas[[dx]])
names(cname) <- cname
for (cf in names(coef))
{
if (cf != dx)
{
if (coef[cf]>0)
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
}
else
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("%5.3f*%s",coef[cf],cname[cf]))
}
}
}
}
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
pander::pander(finalTable[,Final_Columns])
| DecorFormula | caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | RAWAUC | fscores | |
|---|---|---|---|---|---|---|---|---|---|
| g1CNS507 | -0.812 | 0.941 | 0.1840 | 0.855 | 0.11198 | 0.796 | 0.796 | 1 | |
| g1CNS105 | 0.832 | 1.160 | -0.0863 | 0.836 | 0.01027 | 0.749 | 0.749 | 3 | |
| g1int340 | -0.957 | 1.291 | 0.1553 | 1.165 | 0.08048 | 0.737 | 0.737 | NA | |
| g1CNS158 | 0.846 | 1.168 | 0.0216 | 0.890 | 0.00114 | 0.731 | 0.731 | 11 | |
| g8D02 | -0.776 | 1.372 | 0.2052 | 1.194 | 0.14136 | 0.725 | 0.725 | NA | |
| g1int1671 | 0.614 | 1.133 | -0.1787 | 1.055 | 0.20931 | 0.719 | 0.719 | NA | |
| g9E01 | 0.876 | 1.334 | -0.0751 | 1.067 | 0.08676 | 0.717 | 0.717 | NA | |
| g1int812 | -0.571 | 1.013 | 0.1304 | 0.840 | 0.27965 | 0.716 | 0.716 | 11 | |
| g1CNS74 | 0.645 | 1.185 | -0.1661 | 1.093 | 0.41448 | 0.716 | 0.716 | NA | |
| g8F04 | 0.511 | 0.943 | -0.2328 | 0.953 | 0.80241 | 0.710 | 0.710 | NA |